/*
  author Sylvain Bertrand <sylvain.bertrand@gmail.com>
  Protected by linux GNU GPLv2
  Copyright 2012-2014
*/
#include <linux/pci.h>
#include <linux/cdev.h>
#include <asm/unaligned.h>

#include <alga/rng_mng.h>
#include <uapi/alga/pixel_fmts.h>
#include <alga/timing.h>
#include <alga/amd/atombios/atb.h>
#include <uapi/alga/amd/dce6/dce6.h>
#include <alga/amd/atombios/vm.h>
#include <alga/amd/atombios/cm.h>
#include <alga/amd/atombios/pp.h>
#include <alga/amd/atombios/vram_info.h>

#include "../mc.h"
#include "../rlc.h"
#include "../ih.h"
#include "../fence.h"
#include "../ring.h"
#include "../dmas.h"
#include "../ba.h"
#include "../cps.h"
#include "../gpu.h"
#include "../drv.h"

#include "../bif.h"

#include "../cm.h"
#include "../smc.h"
#include "../smc_tbls.h"

#include "../regs.h"

#include "dyn_pm.h"
#include "ctx.h"
#include "private.h"
#include "smc_tbls.h"
#include "smc_sw_regs.h"
#include "smc_cac_cfg_regs.h"
#include "pwrtune.h"
#include "ulv.h"
#include "driver.h"

long eng_pll_compute(struct ctx *ctx, u32 eng_clk, struct eng_pll *eng_pll)
{
	struct dev_drv_data *dd;
	struct atb_eng_pll atb_eng_pll;
	long r;
	u32 ref_div;
	u64 tmp;
	u32 fb_div;

	dd = pci_get_drvdata(ctx->dev);

	r = atb_eng_pll_compute(dd->atb, eng_clk, &atb_eng_pll);
	if (r == -ATB_ERR) {
		dev_err(&ctx->dev->dev, "dyn_pm:unable to compute the parameters of the engine pll\n");
		return -SI_ERR;
	}

	/*--------------------------------------------------------------------*/
	/* compute pll feedback divider */
	ref_div = 1 + atb_eng_pll.ref_div;

	/* clocks are in 10 kHz units */
	tmp = (eng_clk * ref_div * atb_eng_pll.post_div) << 14;

	do_div(tmp, ctx->core_ref_clk);
	fb_div = (u32)tmp;
	/*--------------------------------------------------------------------*/

	/*====================================================================*/
	/* compute engine pll registers */
	eng_pll->cg_eng_pll_func_ctl_0 = dd->pp.clks_regs.cg_eng_pll_func_ctl_0;
	eng_pll->cg_eng_pll_func_ctl_0 &= ~(CEPFC0_PDIV_A | CEPFC0_REF_DIV);
	eng_pll->cg_eng_pll_func_ctl_0 |= set(CEPFC0_REF_DIV,
							atb_eng_pll.ref_div);
	eng_pll->cg_eng_pll_func_ctl_0 |= set(CEPFC0_PDIV_A,
							atb_eng_pll.post_div);

	eng_pll->cg_eng_pll_func_ctl_1 = dd->pp.clks_regs.cg_eng_pll_func_ctl_1;
	eng_pll->cg_eng_pll_func_ctl_1 &= ~CEPFC1_MUX_SEL;
	eng_pll->cg_eng_pll_func_ctl_1 |= set(CEPFC1_MUX_SEL, 2);

	eng_pll->cg_eng_pll_func_ctl_2 = dd->pp.clks_regs.cg_eng_pll_func_ctl_2;
	eng_pll->cg_eng_pll_func_ctl_2 &= ~CEPFC2_FB_DIV;
	eng_pll->cg_eng_pll_func_ctl_2 |= set(CEPFC2_FB_DIV, fb_div);
	eng_pll->cg_eng_pll_func_ctl_2 |= CEPFC2_DITH_ENA;

	eng_pll->cg_eng_pll_func_ctl_3 = dd->pp.clks_regs.cg_eng_pll_func_ctl_3;

	/*--------------------------------------------------------------------*/
	/* related to spread spectrum */
	/*--------------------------------------------------------------------*/
	eng_pll->cg_eng_pll_ss_0 = dd->pp.clks_regs.cg_eng_pll_ss_0;
	eng_pll->cg_eng_pll_ss_1 = dd->pp.clks_regs.cg_eng_pll_ss_1;

	if (ctx->misc_caps & MISC_CAPS_ENG_CLK_SS_ENA) {
		struct atb_ss atb_ss;
		u32 vco_freq;

		vco_freq = eng_clk * atb_eng_pll.post_div;
		
		r = atb_eng_clk_ss_get(dd->atb, vco_freq, &atb_ss);
		if (r == -ATB_ERR) {
			dev_warn(&ctx->dev->dev, "dyn_pm:unable to find spread spectrum parameters for the vco frequency of %u kHz\n",
								vco_freq * 10);
		} else {
			u32 clk_s;
			u32 clk_v;

			clk_s = ctx->core_ref_clk * 5 / (ref_div * atb_ss.rate);
			clk_v = 4 * atb_ss.percentage * fb_div
							/ (clk_s * 10000);
			eng_pll->cg_eng_pll_ss_0 &= ~CEPS0_CLK_S;
			eng_pll->cg_eng_pll_ss_0 |= set(CEPS0_CLK_S, clk_s);
			eng_pll->cg_eng_pll_ss_0 |= CEPS0_SS_ENA;

			eng_pll->cg_eng_pll_ss_1 &= ~CEPS1_CLK_V;
			eng_pll->cg_eng_pll_ss_1 |= set(CEPS1_CLK_V, clk_v);
		}
	}
	/*--------------------------------------------------------------------*/
	/*====================================================================*/
	return 0;
}

u8 pcie_speed_cap(struct ctx *ctx, u8 pcie_gen)
{
	struct dev_drv_data *dd;
	dd = pci_get_drvdata(ctx->dev);

	switch (pcie_gen) {
	case BIF_PCIE_GEN_1:
		if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_1) {
			return BIF_PCIE_GEN_1;
		} else {
			if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_2) {
				dev_warn(&ctx->dev->dev, "dyn_pm:pcie:power shortage:using pcie gen 2 instead of pcie gen 1\n");
				return BIF_PCIE_GEN_2;
			}
			if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_3) {
				dev_warn(&ctx->dev->dev, "dyn_pm:pcie:power shortage:using pcie gen 3 instead of pcie gen 1\n");
				return BIF_PCIE_GEN_3;
			}
			goto exit_default;
		}
	case BIF_PCIE_GEN_2:
		if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_2) {
			return BIF_PCIE_GEN_2;
		} else {
			if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_1)
				return BIF_PCIE_GEN_1;

			if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_3) {
				dev_warn(&ctx->dev->dev, "dyn_pm:pcie:power shortage:using pcie gen 3 instead of pcie gen 2\n");
				return BIF_PCIE_GEN_3;
			}
			goto exit_default;
		}
	case BIF_PCIE_GEN_3:
		if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_3) {
			return BIF_PCIE_GEN_3;
		} else {
			if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_2)
				return BIF_PCIE_GEN_2;
			if (ctx->pcie_root_speeds_mask & BIF_PCIE_ROOT_GEN_1)
				return BIF_PCIE_GEN_1;
			goto exit_default;
		}
	}

exit_default:
	dev_warn(&ctx->dev->dev, "dyn_pm:pcie:power shortage:using pcie gen %u instead of undefined pce gen\n",
						dd->pp.default_pcie_gen + 1);
	return dd->pp.default_pcie_gen;
}

static void mc_lp_regs_init(struct pci_dev *dev)
{
	LOG("initing low power memory controler registers");
	wr32(dev, rr32(dev, MC_SEQ_RAS_TIMING), MC_SEQ_RAS_TIMING_LP);
	wr32(dev, rr32(dev, MC_SEQ_CAS_TIMING), MC_SEQ_CAS_TIMING_LP);
	wr32(dev, rr32(dev, MC_SEQ_MISC_TIMING_0), MC_SEQ_MISC_TIMING_0_LP);
	wr32(dev, rr32(dev, MC_SEQ_MISC_TIMING_1), MC_SEQ_MISC_TIMING_1_LP);
	wr32(dev, rr32(dev, MC_SEQ_PMG_CMD_EMRS), MC_SEQ_PMG_CMD_EMRS_LP);
	wr32(dev, rr32(dev, MC_SEQ_PMG_CMD_MRS_0), MC_SEQ_PMG_CMD_MRS_0_LP);
	wr32(dev, rr32(dev, MC_SEQ_PMG_CMD_MRS_1), MC_SEQ_PMG_CMD_MRS_1_LP);
	wr32(dev, rr32(dev, MC_SEQ_PMG_CMD_MRS_2), MC_SEQ_PMG_CMD_MRS_2_LP);
	wr32(dev, rr32(dev, MC_SEQ_WR_CTL_D0), MC_SEQ_WR_CTL_D0_LP);
	wr32(dev, rr32(dev, MC_SEQ_WR_CTL_D1), MC_SEQ_WR_CTL_D1_LP);
	wr32(dev, rr32(dev, MC_SEQ_RD_CTL_D0), MC_SEQ_RD_CTL_D0_LP);
	wr32(dev, rr32(dev, MC_SEQ_RD_CTL_D1), MC_SEQ_RD_CTL_D1_LP);
	wr32(dev, rr32(dev, MC_SEQ_PMG_TIMING), MC_SEQ_PMG_TIMING_LP);
	wr32(dev, rr32(dev, MC_SEQ_WR_CTL_2), MC_SEQ_WR_CTL_2_LP);
}

static void general_volt_pm_ena(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm |= GP_VOLT_PM_ENA;

	LOG("enable voltage power management, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);

}

static void general_volt_pm_dis(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm &= ~GP_VOLT_PM_ENA;

	LOG("disable voltage power management, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);

}

static void general_ss_ena(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm |= GP_DYN_SPREAD_SPECTRUM_ENA;

	LOG("enable spread spectrum, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);
}

static void general_ss_dis(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm &= ~GP_DYN_SPREAD_SPECTRUM_ENA;

	LOG("disable spread spectrum, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);
}

static void general_thermal_protection_ena(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm &= ~GP_THERMAL_PROTECTION_DIS;

	LOG("enable thermal protection, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);
}

static void general_thermal_protection_dis(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm |= GP_THERMAL_PROTECTION_DIS;

	LOG("disable thermal protection, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);
}


static void b_sp_program(struct ctx *ctx)
{
	LOG("b_sp programming, CG_B_SP=0x%08x", ctx->d_sp);
	wr32(ctx->dev, ctx->d_sp, CG_B_SP);
}

#define GICST_DEFAULT 0x200
static void git_program(struct pci_dev *dev)
{
	u32 cg_git;

	cg_git = rr32(dev, CG_GIT);
	cg_git &= ~CG_GICST;
	cg_git |= set(CG_GICST, GICST_DEFAULT);

	LOG("git programming, CG_GIT=0x%08x", cg_git);	
	wr32(dev, cg_git, CG_GIT);
}

/* start of tp programming----------------------------------------------------*/
#define TD_AUTO	0
#define TD_UP	1
#define TD_DOWN	2
#define TD_DEFAULT TD_AUTO

#define TCS_N 15

#define TP_UTC_DEFAULT_00	0x24
#define TP_UTC_DEFAULT_01	0x22
#define TP_UTC_DEFAULT_02	0x22
#define TP_UTC_DEFAULT_03	0x22
#define TP_UTC_DEFAULT_04	0x22
#define TP_UTC_DEFAULT_05	0x22
#define TP_UTC_DEFAULT_06	0x22
#define TP_UTC_DEFAULT_07	0x22
#define TP_UTC_DEFAULT_08	0x22
#define TP_UTC_DEFAULT_09	0x22
#define TP_UTC_DEFAULT_10	0x22
#define TP_UTC_DEFAULT_11	0x22
#define TP_UTC_DEFAULT_12	0x22
#define TP_UTC_DEFAULT_13	0x22
#define TP_UTC_DEFAULT_14	0x22
#define TP_DTC_DEFAULT_00	0x24
#define TP_DTC_DEFAULT_01	0x22
#define TP_DTC_DEFAULT_02	0x22
#define TP_DTC_DEFAULT_03	0x22
#define TP_DTC_DEFAULT_04	0x22
#define TP_DTC_DEFAULT_05	0x22
#define TP_DTC_DEFAULT_06	0x22
#define TP_DTC_DEFAULT_07	0x22
#define TP_DTC_DEFAULT_08	0x22
#define TP_DTC_DEFAULT_09	0x22
#define TP_DTC_DEFAULT_10	0x22
#define TP_DTC_DEFAULT_11	0x22
#define TP_DTC_DEFAULT_12	0x22
#define TP_DTC_DEFAULT_13	0x22
#define TP_DTC_DEFAULT_14	0x22

u32 tp_utc[TCS_N] = {
	TP_UTC_DEFAULT_00,
	TP_UTC_DEFAULT_01,
	TP_UTC_DEFAULT_02,
	TP_UTC_DEFAULT_03,
	TP_UTC_DEFAULT_04,
	TP_UTC_DEFAULT_05,
	TP_UTC_DEFAULT_06,
	TP_UTC_DEFAULT_07,
	TP_UTC_DEFAULT_08,
	TP_UTC_DEFAULT_09,
	TP_UTC_DEFAULT_10,
	TP_UTC_DEFAULT_11,
	TP_UTC_DEFAULT_12,
	TP_UTC_DEFAULT_13,
	TP_UTC_DEFAULT_14,
};

u32 tp_dtc[TCS_N] = {
	TP_DTC_DEFAULT_00,
	TP_DTC_DEFAULT_01,
	TP_DTC_DEFAULT_02,
	TP_DTC_DEFAULT_03,
	TP_DTC_DEFAULT_04,
	TP_DTC_DEFAULT_05,
	TP_DTC_DEFAULT_06,
	TP_DTC_DEFAULT_07,
	TP_DTC_DEFAULT_08,
	TP_DTC_DEFAULT_09,
	TP_DTC_DEFAULT_10,
	TP_DTC_DEFAULT_11,
	TP_DTC_DEFAULT_12,
	TP_DTC_DEFAULT_13,
	TP_DTC_DEFAULT_14,
};

static void tp_program(struct pci_dev *dev)
{
	u8 i;
	u32 eng_clk_pm_ctl;
	u8 td;

	LOG("tp programming");	

	for (i = 0; i < TCS_N; ++i) {
		LOG("CG_FFCT_%u=0x%08x", i,
			set(CF_UTC_0, tp_utc[i]) | set(CF_DTC_0, tp_dtc[i]));
		wr32(dev, set(CF_UTC_0, tp_utc[i]) | set(CF_DTC_0, tp_dtc[i]),
							CG_FFCT_00 + i * 4);
	}

	td = TD_DEFAULT;

	if (td == TD_AUTO) {
		eng_clk_pm_ctl = rr32(dev, ENG_CLK_PM_CTL);
		eng_clk_pm_ctl &= ~ECPC_FIR_FORCE_TREND_SEL;
		LOG("TD_AUTO, ENG_CLK_PM_CTL=0x%08x", eng_clk_pm_ctl);
		wr32(dev, eng_clk_pm_ctl, ENG_CLK_PM_CTL);
	} else {
		eng_clk_pm_ctl = rr32(dev, ENG_CLK_PM_CTL);
		eng_clk_pm_ctl |= ECPC_FIR_FORCE_TREND_SEL;
		LOG("!TD_AUTO, ENG_CLK_PM_CTL=0x%08x", eng_clk_pm_ctl);
		wr32(dev, eng_clk_pm_ctl, ENG_CLK_PM_CTL);
	}

	if (td == TD_UP) {
		eng_clk_pm_ctl = rr32(dev, ENG_CLK_PM_CTL);
		eng_clk_pm_ctl &= ~ECPC_FIR_TREND_MODE;
		LOG("TD_UP, ENG_CLK_PM_CTL=0x%08x", eng_clk_pm_ctl);
		wr32(dev, eng_clk_pm_ctl, ENG_CLK_PM_CTL);
	} else if (td == TD_DOWN) {
		eng_clk_pm_ctl = rr32(dev, ENG_CLK_PM_CTL);
		eng_clk_pm_ctl |= ECPC_FIR_TREND_MODE;
		LOG("!TD_UP, ENG_CLK_PM_CTL=0x%08x", eng_clk_pm_ctl);
		wr32(dev, eng_clk_pm_ctl, ENG_CLK_PM_CTL);
	}
}
/* end of tp programming------------------------------------------------------*/

#define TPC_DEFAULT 0x200
static void tpp_program(struct pci_dev *dev)
{
	LOG("tpp programming, CG_TPC=0x%08x", TPC_DEFAULT);	

	wr32(dev, TPC_DEFAULT, CG_TPC);
}

#define SSTP_SSTU_DEFAULT	0
#define SSTP_SST_DEFAULT	0x00c8
static void sstp_program(struct pci_dev *dev)
{
	LOG("sstp programming, CG_SSP=0x%08x", set(CS_SSTU, SSTP_SSTU_DEFAULT)
					| set(CS_SST, SSTP_SST_DEFAULT));	
	wr32(dev, set(CS_SSTU, SSTP_SSTU_DEFAULT)
				| set(CS_SST, SSTP_SST_DEFAULT), CG_SSP);
}

static void display_gap_ena(struct pci_dev *dev)
{
	u32 cg_disp_gap_ctl;

	cg_disp_gap_ctl = rr32(dev, CG_DISP_GAP_CTL);

	cg_disp_gap_ctl &= ~(CDGC_DISP1_GAP | CDGC_DISP2_GAP);
	cg_disp_gap_ctl |= set(CDGC_DISP1_GAP, CDGC_IGNORE)
					| set(CDGC_DISP2_GAP, CDGC_IGNORE);

	cg_disp_gap_ctl &= ~(CDGC_DISP1_GAP_MCHG | CDGC_DISP2_GAP_MCHG);
	cg_disp_gap_ctl |= set(CDGC_DISP1_GAP_MCHG, CDGC_VBLANK)
				| set(CDGC_DISP2_GAP_MCHG, CDGC_IGNORE);
	LOG("display gap programming, CG_DISP_GAP_CTL=0x%08x", cg_disp_gap_ctl);	
	wr32(dev, cg_disp_gap_ctl, CG_DISP_GAP_CTL);
}

/*
 * Callback for DCE code to notify the dynamic power management related
 * hardware. This hardware block is insane once you think more than 1/2
 * display/s...
 */
void dyn_pm_new_display_notify(struct device *dev, u8 dps_active_cnt,
							u8 dps_active_first)
{
	struct pci_dev *pdev;
	u32 cg_disp_gap_ctl;
	u32 dccg_disp_slow_select;
	u32 pipe;
	long r;

	pdev = container_of(dev, struct pci_dev, dev);

	cg_disp_gap_ctl = rr32(pdev, CG_DISP_GAP_CTL);
	cg_disp_gap_ctl &= ~(CDGC_DISP1_GAP | CDGC_DISP2_GAP);

	if (dps_active_cnt) {
		cg_disp_gap_ctl |= set(CDGC_DISP1_GAP,
						CDGC_VBLANK_OR_WATERMARK);
		pipe = dps_active_first;
	} else {
		cg_disp_gap_ctl |= set(CDGC_DISP1_GAP, CDGC_IGNORE);
		pipe = 0;
	}

	if (dps_active_cnt >= 2)
		cg_disp_gap_ctl |= set(CDGC_DISP2_GAP,
						CDGC_VBLANK_OR_WATERMARK);
	else
		cg_disp_gap_ctl |= set(CDGC_DISP2_GAP, CDGC_IGNORE);

	/*-------------------------------------------------------------------*/

	wr32(pdev, cg_disp_gap_ctl, CG_DISP_GAP_CTL);

	dccg_disp_slow_select = rr32(pdev, DCCG_DISP_SLOW_SELECT);
	dccg_disp_slow_select &= ~DDSSR_DISP1_SLOW_SELECT;
	dccg_disp_slow_select |= set(DDSSR_DISP1_SLOW_SELECT, pipe);
	wr32(pdev, dccg_disp_slow_select, DCCG_DISP_SLOW_SELECT);

	/*-------------------------------------------------------------------*/

	/*
	 * XXX: always tell the smc we have displays in order to use GPU
	 * to full performance even with no connected displays
	 */
	r = smc_msg(pdev, SMC_MSG_HAS_DISPLAY);
	if (r == -SI_ERR)
		dev_err(dev, "dyn_pm:display_notify:smc:unable to notify the smc of display new configuration\n");
}

#define VRC_DEFAULT 0xc000b3
static void vc_program(struct pci_dev *dev)
{
	LOG("vc programming, CG_FTV=0x%08x", VRC_DEFAULT);	
	wr32(dev, VRC_DEFAULT, CG_FTV);
}

static void vc_clr(struct pci_dev *dev)
{
	LOG("vc clearing");	
	wr32(dev, 0, CG_FTV);
}

static void arb_sets_cpy_and_switch(struct pci_dev *dev, u8 src, u8 dst)
{
	u32 mc_arb_dram_timing_src_0;
	u32 mc_arb_dram_timing_src_1;
	u32 mc_arb_burst_time;
	u32 burst_time;
	u32 mc_cg_cfg;
	u32 mc_arb_cg;

	LOG("arbiter switching from set 0x%02x to set 0x%02x", src, dst);

	mc_arb_dram_timing_src_0 = 0;
	mc_arb_dram_timing_src_1 = 0;
	burst_time = 0;
	switch (src) {
	case MAC_MC_CG_ARB_FREQ_F0:
		mc_arb_dram_timing_src_0 = rr32(dev, MC_ARB_DRAM_TIMING_0_0);
		mc_arb_dram_timing_src_1 = rr32(dev, MC_ARB_DRAM_TIMING_0_1);

		mc_arb_burst_time = rr32(dev, MC_ARB_BURST_TIME);
		burst_time = get(MABT_STATE_0, mc_arb_burst_time);
		break;
	case MAC_MC_CG_ARB_FREQ_F1:
		mc_arb_dram_timing_src_0 = rr32(dev, MC_ARB_DRAM_TIMING_1_0);
		mc_arb_dram_timing_src_1 = rr32(dev, MC_ARB_DRAM_TIMING_1_1);

		mc_arb_burst_time = rr32(dev, MC_ARB_BURST_TIME);
		burst_time = get(MABT_STATE_1, mc_arb_burst_time);
		break;
	case MAC_MC_CG_ARB_FREQ_F2:
		mc_arb_dram_timing_src_0 = rr32(dev, MC_ARB_DRAM_TIMING_2_0);
		mc_arb_dram_timing_src_1 = rr32(dev, MC_ARB_DRAM_TIMING_2_1);

		mc_arb_burst_time = rr32(dev, MC_ARB_BURST_TIME);
		burst_time = get(MABT_STATE_2, mc_arb_burst_time);
		break;
	case MAC_MC_CG_ARB_FREQ_F3:
		mc_arb_dram_timing_src_0 = rr32(dev, MC_ARB_DRAM_TIMING_3_0);
		mc_arb_dram_timing_src_1 = rr32(dev, MC_ARB_DRAM_TIMING_3_1);

		mc_arb_burst_time = rr32(dev, MC_ARB_BURST_TIME);
		burst_time = get(MABT_STATE_3, mc_arb_burst_time);
		break;
	}

	switch (dst) {
	case MAC_MC_CG_ARB_FREQ_F0:
		wr32(dev, mc_arb_dram_timing_src_0, MC_ARB_DRAM_TIMING_0_0);
		wr32(dev, mc_arb_dram_timing_src_1, MC_ARB_DRAM_TIMING_0_1);

		mc_arb_burst_time &= ~MABT_STATE_0;
		mc_arb_burst_time |= set(MABT_STATE_0, burst_time);
		wr32(dev, set(MABT_STATE_0, burst_time), MC_ARB_BURST_TIME);
		break;
	case MAC_MC_CG_ARB_FREQ_F1:
		wr32(dev, mc_arb_dram_timing_src_0, MC_ARB_DRAM_TIMING_1_0);
		wr32(dev, mc_arb_dram_timing_src_1, MC_ARB_DRAM_TIMING_1_1);

		mc_arb_burst_time &= ~MABT_STATE_1;
		mc_arb_burst_time |= set(MABT_STATE_1, burst_time);
		wr32(dev, set(MABT_STATE_1, burst_time), MC_ARB_BURST_TIME);
		break;
	case MAC_MC_CG_ARB_FREQ_F2:
		wr32(dev, mc_arb_dram_timing_src_0, MC_ARB_DRAM_TIMING_2_0);
		wr32(dev, mc_arb_dram_timing_src_1, MC_ARB_DRAM_TIMING_2_1);

		mc_arb_burst_time &= ~MABT_STATE_2;
		mc_arb_burst_time |= set(MABT_STATE_2, burst_time);
		wr32(dev, set(MABT_STATE_2, burst_time), MC_ARB_BURST_TIME);
		break;
	case MAC_MC_CG_ARB_FREQ_F3:
		wr32(dev, mc_arb_dram_timing_src_0, MC_ARB_DRAM_TIMING_3_0);
		wr32(dev, mc_arb_dram_timing_src_1, MC_ARB_DRAM_TIMING_3_1);

		mc_arb_burst_time &= ~MABT_STATE_3;
		mc_arb_burst_time |= set(MABT_STATE_3, burst_time);
		wr32(dev, set(MABT_STATE_3, burst_time), MC_ARB_BURST_TIME);
		break;
	}

	/* clear MCC_MC_RD_ENA and  MCC_IDX */	
	mc_cg_cfg = rr32(dev, MC_CG_CFG) | 0x0000000f;
	wr32(dev, mc_cg_cfg, MC_CG_CFG);

	mc_arb_cg = rr32(dev, MC_ARB_CG);
	mc_arb_cg &= ~MAC_CG_ARB_REQ;
	mc_arb_cg |= set(MAC_CG_ARB_REQ, dst);
	wr32(dev, mc_arb_cg, MC_ARB_CG); 
}

static void arb_set_f0(struct pci_dev *dev)
{
	u8 smc_tbls_cur_arb_set;

	LOG("setting memory controller arbiter 0x%02x hardware set of registers",
							MAC_MC_CG_ARB_FREQ_F0);

	smc_tbls_cur_arb_set = smc_tbls_cur_arb_set_get(dev);

	if (smc_tbls_cur_arb_set == MAC_MC_CG_ARB_FREQ_F0) {
		LOG("the smc has switch the memory controller arbiter to use 0x%02x hardward set of registers",
							MAC_MC_CG_ARB_FREQ_F0);
		return; /* no need to switch */;
	}

	/*
	 * XXX: copy the previous frequency set?? Shouldn't use bear f0 without
	 * a copy?	
	 */
	arb_sets_cpy_and_switch(dev, smc_tbls_cur_arb_set,
							MAC_MC_CG_ARB_FREQ_F0);
}

static void cac_wnd_set(struct pci_dev *dev)
{
	u32 cg_cac_ctl;
	struct pwrtune *pwrtune;

	cg_cac_ctl = rr32(dev, CG_CAC_CTL);
	cg_cac_ctl &= ~CCC_CAC_WND;
	pwrtune = pwrtune_get(dev);
	cg_cac_ctl |= set(CCC_CAC_WND, pwrtune->cac_wnd);
	LOG("calculation accumulator window set to 0x%08x", pwrtune->cac_wnd);
	wr32(dev, cg_cac_ctl, CG_CAC_CTL);
}

static void ds_ena(struct pci_dev *dev)
{
	u32 misc_clk_ctl;
	u32 cg_eng_pll_autoscale_ctl;

	LOG("enabling deep sleep");

	misc_clk_ctl = rr32(dev, MISC_CLK_CTL);
	misc_clk_ctl &= ~MCC_DEEP_SLEEP_CLK_SEL;
	misc_clk_ctl |= set(MCC_DEEP_SLEEP_CLK_SEL, 1);
	LOG("MISC_CLK_CTL=0x%08x",misc_clk_ctl);
	wr32(dev, misc_clk_ctl, MISC_CLK_CTL);

	cg_eng_pll_autoscale_ctl = rr32(dev, CG_ENG_PLL_AUTOSCALE_CTL);
	cg_eng_pll_autoscale_ctl |= CEPAC_AUTOSCALE_ON_SS_CLR;
	LOG("CG_ENG_PLL_AUTOSCALE_CTL=0x%08x",cg_eng_pll_autoscale_ctl);
	wr32(dev, cg_eng_pll_autoscale_ctl, CG_ENG_PLL_AUTOSCALE_CTL);
}

static void clks_regs_save(struct pci_dev *dev)
{
	struct dev_drv_data *dd;

	dd = pci_get_drvdata(dev);

	LOG("saving clock registers, based on module loading state");

	dd->pp.clks_regs.cg_eng_pll_func_ctl_0 = rr32(dev,
							CG_ENG_PLL_FUNC_CTL_0);
	dd->pp.clks_regs.cg_eng_pll_func_ctl_1 = rr32(dev,
							CG_ENG_PLL_FUNC_CTL_1);
	dd->pp.clks_regs.cg_eng_pll_func_ctl_2 = rr32(dev,
							CG_ENG_PLL_FUNC_CTL_2);
	dd->pp.clks_regs.cg_eng_pll_func_ctl_3 = rr32(dev,
							CG_ENG_PLL_FUNC_CTL_3);
	dd->pp.clks_regs.cg_eng_pll_ss_0 = rr32(dev, CG_ENG_PLL_SS_0);
	dd->pp.clks_regs.cg_eng_pll_ss_1 = rr32(dev, CG_ENG_PLL_SS_1);
	dd->pp.clks_regs.dll_ctl = rr32(dev, DLL_CTL);
	dd->pp.clks_regs.mem_clk_pm_ctl = rr32(dev, MEM_CLK_PM_CTL);
	dd->pp.clks_regs.mem_pll_ad_func_ctl = rr32(dev, MEM_PLL_AD_FUNC_CTL);
	dd->pp.clks_regs.mem_pll_dq_func_ctl = rr32(dev, MEM_PLL_DQ_FUNC_CTL);
	dd->pp.clks_regs.mem_pll_func_ctl_0 = rr32(dev, MEM_PLL_FUNC_CTL_0);
	dd->pp.clks_regs.mem_pll_func_ctl_1 = rr32(dev, MEM_PLL_FUNC_CTL_1);
	dd->pp.clks_regs.mem_pll_func_ctl_2 = rr32(dev, MEM_PLL_FUNC_CTL_2);
	dd->pp.clks_regs.mem_pll_ss_0 = rr32(dev, MEM_PLL_SS_0);
	dd->pp.clks_regs.mem_pll_ss_1 = rr32(dev, MEM_PLL_SS_1);
}

static void default_pcie_get(struct pci_dev *dev)
{
	struct dev_drv_data *dd;

	dd = pci_get_drvdata(dev);

	dd->pp.default_pcie_gen = bif_pcie_gen_get(dev);

	LOG("default pcie gen is %u", dd->pp.default_pcie_gen + 1);
}

static void static_pm_ena(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm |= GP_STATIC_PM_ENA;
	LOG("enabling static power management, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);
}

static void static_pm_dis(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm &= ~GP_STATIC_PM_ENA;
	LOG("disabling static power management, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);
}

static void eng_clk_pm_on(struct pci_dev *dev)
{
	u32 eng_clk_pm_ctl;

	eng_clk_pm_ctl = rr32(dev, ENG_CLK_PM_CTL);
	eng_clk_pm_ctl &= ~ECPC_ENG_CLK_PM_OFF;
	LOG("enabling engine clock power management, ENG_CLK_PM_CTL=0x%08x",
								eng_clk_pm_ctl);
	wr32(dev, eng_clk_pm_ctl, ENG_CLK_PM_CTL);	
}

static void global_dyn_pm_ena(struct pci_dev *dev)
{
	u32 general_pm;

	general_pm = rr32(dev, GENERAL_PM);
	general_pm |= GP_GLOBAL_PM_ENA;
	LOG("enabling global power management, GENERAL_PM=0x%08x", general_pm);
	wr32(dev, general_pm, GENERAL_PM);
}

static void thermal_evt_src_set_digital(struct pci_dev *dev)
{
	u32 cg_thermal_ctl;

	cg_thermal_ctl = rr32(dev, CG_THERMAL_CTL);
	cg_thermal_ctl &= ~CTC_DYN_PM_EVT_SRC;
	cg_thermal_ctl |= set(CTC_DYN_PM_EVT_SRC, CTC_EVT_SRC_DIGITAL);
	LOG("programming thermal event source to digital source CG_THERMAL_CTL=0x%08x",
								cg_thermal_ctl);
	wr32(dev, cg_thermal_ctl, CG_THERMAL_CTL);
}

#define TEMP_LOW	90
#define TEMP_HIGH	120 
static void thermal_temp_rng_set(struct pci_dev *dev)
{
	u32 cg_thermal_int;
	u32 cg_thermal_ctl;

	LOG("programming thermal temperature range");

	cg_thermal_int = rr32(dev, CG_THERMAL_INT);
	cg_thermal_int &= ~(CTI_TEMP_LOW | CTI_TEMP_HIGH);
	cg_thermal_int |= set(CTI_TEMP_LOW, TEMP_LOW);
	cg_thermal_int |= set(CTI_TEMP_HIGH, TEMP_HIGH);
	LOG("CG_THERMAL_INT=0x%08x", cg_thermal_int);
	wr32(dev, cg_thermal_int, CG_THERMAL_INT);

	cg_thermal_ctl = rr32(dev, CG_THERMAL_CTL);
	cg_thermal_ctl &= ~CTC_DYN_PM_DIGITAL_TEMP_HIGH;
	cg_thermal_ctl |= set(CTC_DYN_PM_DIGITAL_TEMP_HIGH, TEMP_HIGH);
	LOG("CG_THERMAL_CTL=0x%08x", cg_thermal_ctl);
	wr32(dev, cg_thermal_ctl, CG_THERMAL_CTL);
}

static void pre_init(struct pci_dev *dev)
{
	/*
 	 * The default values of clk regs are taken directly from the hw.
 	 * Hopefully those vals are the initial state vals since we will
 	 * use them for that purpose.
 	 */
	clks_regs_save(dev);

	/* The initial pcie generation is read from the hw */
	default_pcie_get(dev);

	/*
	 * during the smc tbls init, in order to compute the dram timings for
	 * the mc arb, an atombios tbl is used. But that atombios tbl is
	 * actually programming the hw. To be more accurate, that atombios tbl
	 * is programming the mc arb reg set f0. The mc arb has 4 hw sets of
	 * regs. Then the currently used hw set is switched to f1 to let us
	 * use the hw set f0 to compute dram timings without touching the
	 * currently used timings.
	 * Please amd, fix this with a clean atombios tbl! :)
	 */
	arb_sets_cpy_and_switch(dev, MAC_MC_CG_ARB_FREQ_F0,
							MAC_MC_CG_ARB_FREQ_F1);

	/*
	 * Be sure we don't access the smc ram in incremental mode. The
	 * incremental mode is only used for firmware loading.
	 */
	smc_auto_increment_dis(dev);
}

static long init(struct ctx *ctx)
{
	long r;

	static_pm_ena(ctx->dev);

	if (ctx->volt_caps & VOLT_CAPS_VDDC_CTL_ENA) {
		LOG("enabling voltage power management");
		general_volt_pm_ena(ctx->dev);
		ctx->state |= STATE_VOLT_PM_ENA;
	} else 
		LOG("voltage power management not enabled");

	mc_lp_regs_init(ctx->dev);

	if (ctx->misc_caps & MISC_CAPS_ENG_CLK_SS_ENA) {
		LOG("enabling spread spectrum management");
		general_ss_ena(ctx->dev);
		ctx->state |= STATE_SS_ENA;
	} else
		LOG("spread spectrum not enabled");

	if (ctx->misc_caps & MISC_CAPS_THERMAL_PROTECTION_ENA) {
		LOG("enabling thermal protection");
		general_thermal_protection_ena(ctx->dev);
		ctx->state |= STATE_THERMAL_PROTECTION_ENA;
	} else
		LOG("thermal protection not enabled");

	b_sp_program(ctx);
	git_program(ctx->dev);
	tp_program(ctx->dev);
	tpp_program(ctx->dev);
	sstp_program(ctx->dev);
	display_gap_ena(ctx->dev);
	vc_program(ctx->dev);

	LOG("smc reset");
	smc_reset(ctx->dev);
	LOG("smc clock stop");
	smc_clk_stop(ctx->dev);
	LOG("smc ucode program");
	smc_ucode_program(ctx->dev);

	r = smc_tbls_init(ctx);
	if (r == -SI_ERR)
		goto err_clr_vc;
	smc_sw_regs_init(ctx);
#ifdef CONFIG_ALGA_AMD_SI_DYN_PM_LOG
	smc_sw_regs_dump(ctx->dev);
#endif

	if (ctx->atb_ulv.lvls_n)
		ulv_program(ctx->dev);

	cac_wnd_set(ctx->dev);
	smc_cac_cfg_regs_init(ctx->dev);
	ds_ena(ctx->dev);
	LOG("starting smc");
	smc_initial_jmp_setup(ctx->dev);
	smc_start(ctx->dev);
	smc_clk_start(ctx->dev);

	LOG("tell the smc there is no active display");
	r = smc_msg(ctx->dev, SMC_MSG_NO_DISPLAY);
	if (r == -SI_ERR) {
		dev_err(&ctx->dev->dev, "dyn_pm:init:unable to tell the smc there is no active display\n");
		goto err_clr_vc;
	}

	eng_clk_pm_on(ctx->dev);
	global_dyn_pm_ena(ctx->dev);
	if (ctx->misc_caps & MISC_CAPS_THERMAL_PROTECTION_ENA) {
		thermal_temp_rng_set(ctx->dev);
		/* TODO: we should enable the thermal interrupt here */
		thermal_evt_src_set_digital(ctx->dev);
	}
	return 0;

err_clr_vc:
	vc_clr(ctx->dev);

	if (ctx->state & STATE_THERMAL_PROTECTION_ENA)
		general_thermal_protection_dis(ctx->dev);

	if (ctx->state & STATE_SS_ENA)
		general_ss_dis(ctx->dev);

	if (ctx->state & STATE_VOLT_PM_ENA)
		general_volt_pm_dis(ctx->dev);

	static_pm_dis(ctx->dev);
	arb_set_f0(ctx->dev);
	return -SI_ERR;
}

long dyn_pm_ena(struct pci_dev *dev)
{
	long r;
	struct ctx *ctx;

	/* this is too big for the stack */
	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx) {
		dev_err(&dev->dev, "dyn_pm:enable to allocate memory for the context\n");
		goto err;
	}

	r = ctx_init(dev, ctx);
	if (r == -SI_ERR)
		goto err_free_ctx;

	pre_init(dev);

	r = init(ctx);
	if (r == -SI_ERR)
		goto err_free_ctx;

	/*
	 * we have the dyn pm hw running in a basic state, now time to switch to
	 * a usefull pwr state
	 */
	r = driver_set_performance(ctx);
	if (r == -SI_ERR)
		goto err_free_ctx;

	/* TODO: check the pcie link is retrained to max speed by the smc */

	LOG("switching off the universal video decoder (because mpeg)");
	r = smc_msg(ctx->dev, SMC_MSG_UVD_PWR_OFF);
	if (r == -SI_ERR)
		dev_warn(&ctx->dev->dev, "dyn_pm:init:smc:unable to switch off the universal video decoder block (it does only mpeg)\n");

	ctx_free(ctx);

	return 0;

err_free_ctx:
	ctx_free(ctx);

err:
	return -SI_ERR;
}

/* this is an attempt to disable the dyn pm */
void dyn_pm_dis(struct pci_dev *dev)
{
	long r;
	struct dev_drv_data *dd;

	dd = pci_get_drvdata(dev);

	vc_clr(dev);

	/*--------------------------------------------------------------------*/
	r = atb_have_thermal_protection(dd->atb);
	if (r == -ATB_ERR) {
		dev_err(&dev->dev, "dyn_pm:unable to look for thermal protection/controller\n");
	} else if (r == ATB_HAVE_THERMAL_PROTECTION) {
		LOG("disabling thermal protection");
		general_thermal_protection_dis(dev);
	} else
		LOG("thermal protection not disabled");
	/*--------------------------------------------------------------------*/

	/*--------------------------------------------------------------------*/
	r = atb_have_eng_clk_ss(dd->atb);
	if (r == -ATB_ERR) {
		dev_err(&dev->dev, "dyn_pm:unable to look for engine spread spectrum support\n");
	} else if (r == ATB_HAVE_ENG_CLK_SS) {
		LOG("disabling spread spectrum management");
		general_ss_dis(dev);
	} else
		LOG("spread spectrum not disabled");
	/*--------------------------------------------------------------------*/

	/*--------------------------------------------------------------------*/
	r = atb_have_vddc_ctl(dd->atb);
	if (r == -ATB_ERR) {
		dev_err(&dev->dev, "dyn_pm:unable to look for vddc control\n");
	} else if (r == ATB_HAVE_VDDC_CTL) {
		LOG("disabling voltage power management");
		general_volt_pm_dis(dev);
	} else 
		LOG("voltage power management not disabled");
	/*--------------------------------------------------------------------*/

	static_pm_dis(dev);
	arb_set_f0(dev);
}
